package fr.polisons.reader; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import org.htmlcleaner.CleanerProperties; import org.htmlcleaner.HtmlCleaner; import org.htmlcleaner.TagNode; import org.htmlcleaner.XPatherException; import android.graphics.Bitmap; import android.graphics.BitmapFactory; import android.graphics.drawable.BitmapDrawable; import android.graphics.drawable.Drawable; import android.text.Html; import android.util.Log; public class News { private static final String ERROR = "ERROR"; private HtmlCleaner pageParser; private TagNode rootNode; private INewsData datas; private List<INewsData> newsData; // Initialize Class public News() { // Create HtmlCleaner object to turn the page into // XML that we can analyze to get the songs from the page. pageParser = new HtmlCleaner(); CleanerProperties props = pageParser.getProperties(); props.setAllowHtmlInsideAttributes(true); props.setAllowMultiWordAttributes(true); props.setRecognizeUnicodeChars(true); props.setOmitComments(true); props.setUseCdataForScriptAndStyle(true); } // Open web site URL public void open(String url) throws IOException { rootNode = pageParser.clean(new URL(url)); } // extract an attribute from an element // Param attribute name // TagNode to work on private String extractAttribute(String type, TagNode node) { TagNode[] tags = node.getElementsHavingAttribute(type, true); String value = ""; for(TagNode t: tags){ value = t.getAttributeByName(type); } return Html.fromHtml(value).toString(); } // extract a text from an element // Param Element name // TagNode to work on private String extractElement(String element, TagNode node) { TagNode[] tags = node.getElementsByName(element, true); String value = ""; for(TagNode t: tags){ if (t.getChildren().size() > 0) { value = t.getChildren().get(0).toString(); } } return Html.fromHtml(value).toString(); } private String extractText(TagNode node, boolean keepHtmlTags) { StringBuilder value = new StringBuilder(); if (node.getChildren().size() > 0) { for (int i=0; i< node.getChildren().size(); i++) { if (node.getChildren().get(i).toString().equals("strong")) { value.append("<b>"+((TagNode)node.getChildren().get(i)).getText().toString() +"</b>"); } else if (node.getChildren().get(i).toString().equals("img")) { value.append("<br>"); } else if (node.getChildren().get(i).toString().equals("br")) { value.append("<br>"); } else if (node.getChildren().get(i).toString().equals("a")) { value.append(((TagNode)node.getChildren().get(i)).getText().toString()); } else if (node.getChildren().get(i).toString().equals("small")) { value.append("<small>"+((TagNode)node.getChildren().get(i)).getText().toString() +"</small>"); } else if (node.getChildren().get(i).toString() != null) { value.append(node.getChildren().get(i).toString() ); } } } // Do you want to keep Html tags if (keepHtmlTags) { return value.toString(); } else { // To this to reformat encoded character and remove html tags like <br> return Html.fromHtml(value.toString()).toString(); } } private Bitmap downloadImage(String urL) { Bitmap bitmap = null; InputStream in = null; try { in = openHttpConnection("http://www.poli-sons.fr/"+urL); bitmap = BitmapFactory.decodeStream(in); in.close(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } return bitmap; } private InputStream openHttpConnection(String urlString) throws IOException { InputStream in = null; int response = -1; URL url = new URL(urlString); URLConnection conn = url.openConnection(); if (!(conn instanceof HttpURLConnection)) { throw new IOException("Not an HTTP connection"); } try{ HttpURLConnection httpConn = (HttpURLConnection) conn; httpConn.setAllowUserInteraction(false); httpConn.setInstanceFollowRedirects(true); httpConn.setRequestMethod("GET"); httpConn.connect(); response = httpConn.getResponseCode(); if (response == HttpURLConnection.HTTP_OK) { in = httpConn.getInputStream(); } } catch (Exception ex) { throw new IOException("Error connecting"); } return in; } public List<INewsData> getNews(String xPathExpression) { newsData = new ArrayList<INewsData>(); try { // Stupid API returns Object[]... Why not TagNodes? We'll cast it later Object[] downloadNodes = rootNode.evaluateXPath(xPathExpression); // Create data structure // Iterate through the nodes selected by the XPath statement... for(Object linkNode : downloadNodes){ // Recursively find all nodes which have "href" (link) attributes. Then, store // the link values in an ArrayList. Create a new ArchiveSongObj with these links // and the title of the track, which is the inner HTML of the first child node. datas = new INewsData(); datas.title = extractAttribute("title", (TagNode)linkNode);//pageParser.getInnerHtml(((TagNode)((TagNode)linkNode).getChildren().get(0))).trim(); datas.link = extractAttribute("href", (TagNode)linkNode); datas.description = extractElement("p", (TagNode)linkNode); datas.date = extractElement("abbr", (TagNode)linkNode); datas.image = new BitmapDrawable(downloadImage(extractAttribute("src", (TagNode)linkNode))); Log.d("PoliSons", "Title is " + datas.title); Log.d("PoliSons", "Link is " + datas.link); Log.d("PoliSons", "Description is " + datas.description); Log.d("PoliSons", "Date is " + datas.date); newsData.add( datas); } } catch (XPatherException e) { Log.e(ERROR, e.getMessage()); } return newsData; } public List<INewsData> getPodcast(String xPathExpression) { newsData = new ArrayList<INewsData>(); try { // Stupid API returns Object[]... Why not TagNodes? We'll cast it later Object[] downloadNodes = rootNode.evaluateXPath(xPathExpression); // Create data structure // Iterate through the nodes selected by the XPath statement... for(Object linkNode : downloadNodes){ // Recursively find all nodes which have "href" (link) attributes. Then, store // the link values in an ArrayList. Create a new ArchiveSongObj with these links // and the title of the track, which is the inner HTML of the first child node. datas = new INewsData(); datas.title = extractText((TagNode)linkNode , false); datas.title = datas.title.substring( datas.title.lastIndexOf("\n")+1 ); //datas.title = extractAttribute("title", (TagNode)linkNode);//pageParser.getInnerHtml(((TagNode)((TagNode)linkNode).getChildren().get(0))).trim(); datas.link = extractAttribute("href", (TagNode)linkNode); //datas.description = extractElement("p", (TagNode)linkNode); //datas.date = extractElement("abbr", (TagNode)linkNode); Bitmap img = downloadImage(extractAttribute("src", (TagNode)linkNode)); if (img != null) { datas.image = new BitmapDrawable(img); } Log.d("PoliSons", "Title is " + datas.title); Log.d("PoliSons", "Link is " + datas.link); Log.d("PoliSons", "Description is " + datas.description); Log.d("PoliSons", "Date is " + datas.date); newsData.add( datas); } } catch (XPatherException e) { Log.e(ERROR, e.getMessage()); } return newsData; } public String getNewsDetail(String xPathExpression, String type, String value) { String data = ""; try { // Stupid API returns Object[]... Why not TagNodes? We'll cast it later Object[] downloadNodes = rootNode.evaluateXPath(xPathExpression); for(Object linkNode : downloadNodes){ // Create data structure if (type.equals("attribute")) { data = extractAttribute(value, (TagNode)linkNode); } if (type.equals("element")) { data += extractElement(value, (TagNode)linkNode); } if (type.equals("text")) { data += extractText((TagNode)linkNode, true)+"<br>"; } Log.d("PoliSons", "Data extracted is " + data); } } catch (XPatherException e) { Log.e(ERROR, e.getMessage()); } return data; } public Drawable getNewsImage(String xPathExpression, String type, String value) { Drawable img = null; try { // Stupid API returns Object[]... Why not TagNodes? We'll cast it later Object[] downloadNodes = rootNode.evaluateXPath(xPathExpression); // Create data structure // Iterate through the nodes selected by the XPath statement... for(Object linkNode : downloadNodes){ // Recursively find all nodes which have "href" (link) attributes. Then, store // the link values in an ArrayList. Create a new ArchiveSongObj with these links // and the title of the track, which is the inner HTML of the first child node. String imglink = ((TagNode)linkNode).getAttributeByName(value); img = new BitmapDrawable(downloadImage(imglink)); return img; } } catch (XPatherException e) { Log.e(ERROR, e.getMessage()); } return img; } }